# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2025/9/21 21:38
# @Author  : Dell
# @File    : doucment_loader.py
# @Software: PyCharm
# @Desc    :文档加载器

def web_tz():
    """网站帖子"""
    from langchain_community.document_loaders import HNLoader
    loader = HNLoader("https://www.baidu.com/")
    data = loader.load()
    print(f"发现{len(data)}网站")
    # print()

def dz_book():
    from langchain_community.document_loaders import GutenbergLoader
    loader = GutenbergLoader("https://www.gutenberg.org/cache/epub/2148/pg2148.txt")
    data = loader.load()
    print(data)

def url_web():
    """无固定结构的网站"""
    from langchain_community.document_loaders import UnstructuredURLLoader
    urls =["http://www.paulgraham.com/"]
    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()
    print(data)
if __name__ == "__main__":
    url_web()